%This program runs the simulations behind Fig. OA.5.

%It runs K experiments in which a Q-learning algorithm chooses between two
%actions 1 and 2, and stops after at least a number "min_episodes" of
%episodes, and after having played the same action for "threshold" times.

clear

% record running time
% when producing the data the running time was 78 seconds
tic

%Set the parameters of the experiment:
K=1000;                %K      is the number of experiments.
min_episodes=50000;    %T      is the number of episodes.
alpha=1;               %alpha  is the learning rate.
beta=0.0008;           %beta   is the exploration decay rate.    
p1=0.1;                %p1 is the probability to receive a high payoff with action 1
p2=0.9;                %p2 is the probability to receive a high payoff with action 2
pi1 = [4,0];           %pi1 is the value of the asset in both states (h and l) with action 1
pi2 = [2,0];           %pi2 is the value of the asset in both states (h and l) with action 2

% Parallel computing initialization
% Can be commented out to run without parallelization
delete(gcp("nocreate"));  % to close any existing pools that might be already running
myCluster = parcluster('local'); % get number of possible pools
parpool('local',8);


%For Panel A: Fixed stopping time
    threshold = 0; %set threshold = 0, meaning the experiment will stop 
    %after min_episodes and a repetition of the same outcomes 0 time, hence
    %the stopping time is min_episodes

    %run K simulations using simulation_rep
    [all_last_episodes_K] = simulation_rep(K,alpha,p1,p2,pi1,pi2,threshold,beta,min_episodes);

    %Save the output in the subfolder "Data"
    writematrix(all_last_episodes_K,'Data/last_episodes_fixed.txt');

%For Panel B: Random stopping time

    threshold = 100; %set threshold = 100, meaning the experiment will stop 
    %after min_episodes and a repetition of the same outcomes 100 times, hence
    %the stopping time is random

    %run K simulations using simulation_rep
    [all_last_episodes_K] = simulation_rep(K,alpha,p1,p2,pi1,pi2,threshold,beta,min_episodes);

    %Save the output in the subfolder "Data"
    writematrix(all_last_episodes_K,'Data/last_episodes_random.txt');

    toc